In this R Markdown document we will display and document each step of CS329e R Project .

Project Requirements http://www.cs.utexas.edu/~cannata/dataVis/Projects/R%20Project%203/R%20Project%20Requirements.html.

In the source folder 01 Data, the R script Access Oracle Database.R was created to connect to Dr. Cannata’s Orcale database and generate a data frame from the uploaded Vocabulary.csv file. The RCurl and jsonlite package were loaded to allow general HTTP requests and process the results returned by the Web server.

Describe joined datasets.

source("../01 Data/Access Oracle Database.R", echo = TRUE)
## 
## > require("RCurl")
## Loading required package: RCurl
## Loading required package: bitops
## 
## > require("jsonlite")
## Loading required package: jsonlite
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:utils':
## 
##     View
## 
## > require("dplyr")
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## > require("ggplot2")
## Loading required package: ggplot2
## 
## > quake <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT r1.LATITUDE, r1.LONGITUDE, r1.PLACE, r2.LATITUDE, r2. .... [TRUNCATED] 
## 
## > quake %>% ggplot(aes(x = LATITUDE, y = LONGITUDE, 
## +     color = PLACE)) + geom_point()

A second R script subset.R was created to display the vocab data frame that was previously created in the last step. The head(), tail(), tbl_df(), glimpse(), and summary() functions were used to return subsets of this data frame.

source("../01 Data/Subset Data.R", echo = TRUE)
## 
## > require("dplyr")
## 
## > head(quake)
##   LATITUDE LONGITUDE                        PLACE LATITUDE.1 LONGITUDE.1
## 1  37.1418 -117.2618   51km WNW of Beatty, Nevada    35.5500    -96.7640
## 2  37.1418 -117.2618   51km WNW of Beatty, Nevada    35.5310    -96.7880
## 3  41.8797 -119.6241 65km ESE of Lakeview, Oregon    34.7740    -97.5960
## 4  41.8797 -119.6241 65km ESE of Lakeview, Oregon    35.6810    -97.0980
## 5  41.8797 -119.6241 65km ESE of Lakeview, Oregon    36.1309    -97.6291
## 6  41.8797 -119.6241 65km ESE of Lakeview, Oregon    35.8913    -97.2752
##                         PLACE.1 MAG MAG.1
## 1                      Oklahoma 4.8   4.8
## 2                      Oklahoma 4.8   4.8
## 3                      Oklahoma 4.2   4.2
## 4   8km ENE of Luther, Oklahoma 4.2   4.2
## 5  20km N of Crescent, Oklahoma 4.2   4.2
## 6 6km SSW of Langston, Oklahoma 4.2   4.2
## 
## > tail(quake)
##      LATITUDE LONGITUDE               PLACE LATITUDE.1 LONGITUDE.1
## 8370   39.399  -121.601 Northern California    35.6073    -97.3863
## 8371   40.418  -120.578 Northern California    35.5414    -96.7537
## 8372   40.394  -120.518 Northern California    33.0300   -100.7660
## 8373   40.394  -120.518 Northern California    35.1920    -97.3200
## 8374   40.394  -120.518 Northern California    35.6860    -97.0890
## 8375   40.394  -120.518 Northern California    35.7727    -97.4677
##                            PLACE.1 MAG MAG.1
## 8370   9km ESE of Edmond, Oklahoma 4.5   4.5
## 8371                      Oklahoma 4.7   4.7
## 8372                 western Texas 4.4   4.4
## 8373                      Oklahoma 4.4   4.4
## 8374  12km ENE of Luther, Oklahoma 4.4   4.4
## 8375 12km SSW of Guthrie, Oklahoma 4.4   4.4
## 
## > tbl_df(quake)
## Source: local data frame [8,375 x 8]
## 
##    LATITUDE LONGITUDE                        PLACE LATITUDE.1 LONGITUDE.1
## 1   37.1418 -117.2618   51km WNW of Beatty, Nevada    35.5500    -96.7640
## 2   37.1418 -117.2618   51km WNW of Beatty, Nevada    35.5310    -96.7880
## 3   41.8797 -119.6241 65km ESE of Lakeview, Oregon    34.7740    -97.5960
## 4   41.8797 -119.6241 65km ESE of Lakeview, Oregon    35.6810    -97.0980
## 5   41.8797 -119.6241 65km ESE of Lakeview, Oregon    36.1309    -97.6291
## 6   41.8797 -119.6241 65km ESE of Lakeview, Oregon    35.8913    -97.2752
## 7   41.8797 -119.6241 65km ESE of Lakeview, Oregon    35.9466    -96.7594
## 8   41.8797 -119.6241 65km ESE of Lakeview, Oregon    36.8479    -97.7016
## 9   41.8797 -119.6241 65km ESE of Lakeview, Oregon    36.2618    -97.2643
## 10  41.8797 -119.6241 65km ESE of Lakeview, Oregon    36.8147    -98.2915
## ..      ...       ...                          ...        ...         ...
## Variables not shown: PLACE.1 (fctr), MAG (dbl), MAG.1 (dbl)
## 
## > glimpse(quake)
## Observations: 8375
## Variables:
## $ LATITUDE    (dbl) 37.1418, 37.1418, 41.8797, 41.8797, 41.8797, 41.87...
## $ LONGITUDE   (dbl) -117.2618, -117.2618, -119.6241, -119.6241, -119.6...
## $ PLACE       (fctr) 51km WNW of Beatty, Nevada, 51km WNW of Beatty, N...
## $ LATITUDE.1  (dbl) 35.5500, 35.5310, 34.7740, 35.6810, 36.1309, 35.89...
## $ LONGITUDE.1 (dbl) -96.7640, -96.7880, -97.5960, -97.0980, -97.6291, ...
## $ PLACE.1     (fctr) Oklahoma, Oklahoma, Oklahoma, 8km ENE of Luther, ...
## $ MAG         (dbl) 4.8, 4.8, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...
## $ MAG.1       (dbl) 4.8, 4.8, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...
## 
## > summary(quake)
##     LATITUDE       LONGITUDE     
##  Min.   :32.73   Min.   :-124.5  
##  1st Qu.:34.37   1st Qu.:-120.1  
##  Median :36.82   Median :-118.5  
##  Mean   :36.48   Mean   :-118.7  
##  3rd Qu.:37.64   3rd Qu.:-116.7  
##  Max.   :41.94   Max.   :-114.7  
##                                  
##                                   PLACE        LATITUDE.1   
##  Central California                  :2225   Min.   :32.85  
##  Southern California                 :2203   1st Qu.:35.59  
##  Nevada                              :1321   Median :35.95  
##  Northern California                 :1077   Mean   :35.96  
##  Greater Los Angeles area, California: 538   3rd Qu.:36.63  
##  Long Valley area, California        : 342   Max.   :39.16  
##  (Other)                             : 669                  
##   LONGITUDE.1                               PLACE.1          MAG      
##  Min.   :-100.77   Oklahoma                     :1746   Min.   :4.00  
##  1st Qu.: -97.71   western Texas                : 588   1st Qu.:4.00  
##  Median : -97.48   4km S of Cushing, Oklahoma   : 498   Median :4.10  
##  Mean   : -97.73   6km SSW of Langston, Oklahoma: 459   Mean   :4.14  
##  3rd Qu.: -97.10   12km SSW of Guthrie, Oklahoma: 420   3rd Qu.:4.20  
##  Max.   : -95.88   19km S of Medford, Oklahoma  : 300   Max.   :5.60  
##                    (Other)                      :4364                 
##      MAG.1     
##  Min.   :4.00  
##  1st Qu.:4.00  
##  Median :4.10  
##  Mean   :4.14  
##  3rd Qu.:4.20  
##  Max.   :5.60  
##                
## 
## > str(quake)
## 'data.frame':    8375 obs. of  8 variables:
##  $ LATITUDE   : num  37.1 37.1 41.9 41.9 41.9 ...
##  $ LONGITUDE  : num  -117 -117 -120 -120 -120 ...
##  $ PLACE      : Factor w/ 27 levels "10km S of Rancho Palos Verdes, California",..: 7 7 11 11 11 11 11 11 11 11 ...
##  $ LATITUDE.1 : num  35.5 35.5 34.8 35.7 36.1 ...
##  $ LONGITUDE.1: num  -96.8 -96.8 -97.6 -97.1 -97.6 ...
##  $ PLACE.1    : Factor w/ 26 levels "12km ENE of Luther, Oklahoma",..: 25 25 25 17 6 15 12 13 10 19 ...
##  $ MAG        : num  4.8 4.8 4.2 4.2 4.2 ...
##  $ MAG.1      : num  4.8 4.8 4.2 4.2 4.2 ...

In this data wrangling section, several work flows are constructed using the %>% pipe operator which revealed new information about the data set.

source("../02 Data Wrangling/Joining Data.R", echo = TRUE)
## 
## > require("RCurl")
## 
## > require("jsonlite")
## 
## > require("dplyr")
## 
## > require("tidyr")
## Loading required package: tidyr
## 
## > require("ggplot2")
## 
## > Above4cali <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM ABOVE4CALI\""), 
## +     httpheader = c(DB  .... [TRUNCATED] 
## 
## > Above4OU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM ABOVE4OU\""), 
## +     httpheader = c(DB = "j ..." ... [TRUNCATED] 
## 
## > Recentca <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM RECENTCA\""), 
## +     httpheader = c(DB = "j ..." ... [TRUNCATED] 
## 
## > Recentou <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM RECENTOU\""), 
## +     httpheader = c(DB = "j ..." ... [TRUNCATED] 
## 
## > names(Above4OU)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Above4cali)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Recentca)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Recentou)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > left_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE.x, 
## +     y = LONGITUDE.x, color = PLACE.x)) + geom_point()

## 
## > right_join(Above4cali, Above4OU, by = "DEPTH") %>% 
## +     ggplot(aes(x = LATITUDE.x, y = LONGITUDE.x, color = PLACE.x)) + 
## +     geom_point()

## 
## > inner_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE.x, 
## +     y = LONGITUDE.x, color = PLACE.x)) + geom_point()

## 
## > full_join(Above4cali, Above4OU, by = "DEPTH") %>% 
## +     ggplot(aes(x = LATITUDE.x, y = LONGITUDE.x, color = PLACE.x)) + 
## +     geom_point()

## 
## > semi_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE, 
## +     y = LONGITUDE, color = PLACE)) + geom_point()

## 
## > anti_join(Above4cali, Above4OU, by = "DEPTH") %>% 
## +     ggplot(aes(x = LATITUDE, y = LONGITUDE, color = PLACE)) + 
## +     geom_point()

## 
## > bind_rows(Above4cali, Above4OU) %>% ggplot(aes(x = LATITUDE, 
## +     y = LONGITUDE, color = PLACE)) + geom_point()

Here are our Beautiful Graphs. The first graph shows the typical range and medium of magnitudes per state. Outliers are also shown. The second graph shows the number of earthquakes above magnitude 4 that have occurred since 1950 in the selected state. This exemplifies just how many powerful earthquakes have occurred in California compared to the other states.

source("../02 Data Wrangling/Data Wrangling.R", echo = TRUE)
## 
## > require("dplyr")
## 
## > require("tidyr")
## 
## > require("jsonlite")
## 
## > names(Above4OU)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Above4cali)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Recentca)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > names(Recentou)
##  [1] "TIME"      "LATITUDE"  "LONGITUDE" "DEPTH"     "MAG"      
##  [6] "MAGTYPE"   "NST"       "GAP"       "DMIN"      "RMS"      
## [11] "NET"       "ID"        "UPDATED"   "PLACE"     "TYPE"     
## 
## > justCA <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG,PLACE FROM CLEANEDRECENTCAL \""), 
## +     httphea .... [TRUNCATED] 
## 
## > justOU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANEDRECENTOU \""), 
## +     httphea .... [TRUNCATED] 
## 
## > combinedAll <- bind_rows(justCA, justOU)
## 
## > p2 <- combinedAll %>% ggplot(aes(x = PLACE, y = MAG, 
## +     color = PLACE)) + geom_boxplot()
## 
## > p2 <- p2 + theme(legend.position = "none") + labs(x = "State", 
## +     y = "Magnitude") + theme(plot.title = element_text(size = 20, 
## +     face = "b ..." ... [TRUNCATED] 
## 
## > p2
## 
## > above4CA <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANABOVE4CALI \""), 
## +     httph .... [TRUNCATED] 
## 
## > above4OU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANEDABOVE4OU \""), 
## +     httph .... [TRUNCATED] 
## 
## > p3 <- above4CA %>% ggplot(aes(x = PLACE, color = PLACE)) + 
## +     geom_bar(stat = "bin")
## 
## > p3 <- p3 + scale_y_continuous(limits = c(0, 1250)) + 
## +     theme(legend.position = "none") + labs(x = "State", y = "Number of Level 4 and Above Ear ..." ... [TRUNCATED] 
## 
## > p3 <- p3 + theme(plot.title = element_text(size = 20, 
## +     face = "bold"))
## 
## > p4 <- above4OU %>% ggplot(aes(x = PLACE, color = PLACE), 
## +     show_guide = FALSE) + geom_bar(stat = "bin")
## 
## > p4 <- p4 + scale_y_continuous(limits = c(0, 1250)) + 
## +     theme(legend.position = "none") + labs(x = "State", y = "Number of Level 4 and Above Ear ..." ... [TRUNCATED] 
## 
## > require(grid)
## Loading required package: grid

## 
## > p2

## 
## > pushViewport(viewport(layout = grid.layout(1, 2)))
## 
## > print(p3, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
## 
## > print(p4, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))

13.Include in your html file a PNG image that shows a characterization of the categorical columns (as group by plots) and measure columns (as histograms)

#source("../03 Visualizations/PNG .R", echo = TRUE)